{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 准备工作\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from lxml.html import fromstring\n",
    "import time\n",
    "from random import random\n",
    "from requests_html import HTMLSession"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-2-dd1f2cad3fb0>:10: DeprecationWarning: use options instead of chrome_options\n",
      "  driver = webdriver.Chrome( chrome_options = opts) #desired_capabilities=caps,\n"
     ]
    }
   ],
   "source": [
    "from selenium import webdriver\n",
    "from selenium.webdriver.common.desired_capabilities import DesiredCapabilities\n",
    "\n",
    "opts = webdriver.ChromeOptions()\n",
    "opts.add_argument('--no-sandbox')# 解决DevToolsActivePort文件不存在的报错\n",
    "opts.add_argument('window-size=1920x3000') # 指定浏览器分辨率\n",
    "opts.add_argument('--disable-gpu') # 谷歌文档提到需要加上一这个属性来规避bug\n",
    "opts.add_argument('--hide-scrollbars') # 隐藏滚动条, 应对些特殊页面\n",
    "\n",
    "driver = webdriver.Chrome( chrome_options = opts) #desired_capabilities=caps,"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 进入CNKI\n",
    "driver.get(\"https://www.cnki.net/\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_xpath('//*[@id=\"headerBox\"]/div[1]/div/div/div[4]/a').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_xpath('/html/body/form/div[4]/div/div/div[9]/a[2]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'中山大学南方学院'"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 检查登陆状况\n",
    "element = driver.find_element_by_id('Ecp_loginShowName1')\n",
    "element.get_attribute('innerHTML')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 打开高级检索\n",
    "driver.find_element_by_id('highSearch').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'CDwindow-8D8283095CD2D3246CFBF4EE753ABEC4'"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## 当前窗口\n",
    "driver.current_window_handle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CDwindow-8D8283095CD2D3246CFBF4EE753ABEC4',\n",
       " 'CDwindow-1CD9E637B649BF5766B745C8B5E27E72']"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## 查看所有的窗口信息\n",
    "driver.window_handles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-11-704fdf3805c1>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])\n"
     ]
    }
   ],
   "source": [
    "# 切换窗口\n",
    "driver.switch_to_window(driver.window_handles[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 选择学术期刊\n",
    "driver.find_element_by_xpath('//li[@data-id=\"xsqk\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 选择专业检索\n",
    "driver.find_element_by_name('majorSearch').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 输入关键词\n",
    "element=driver.find_element_by_xpath('/html/body/div[2]/div/div[2]/div/div[1]/div[1]/div[2]/textarea')\n",
    "element.clear()\n",
    "element.send_keys('SU = \"设计\" AND  (TI =\"人工智能\" OR  TI =\"大数据\"  OR TI = \"AI\" OR TI = \"big data\")')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 选中CSSCI\n",
    "driver.find_element_by_xpath('//input[@key=\"CSI\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 选中北大核心\n",
    "driver.find_element_by_xpath('//input[@key=\"HX\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 检索\n",
    "driver.find_element_by_xpath('//input[@value=\"检索\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'1,375'"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 显示文章数量\n",
    "driver.find_element_by_xpath('//*[@id=\"countPageDiv\"]/span[1]/em').get_attribute('innerHTML')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 更换每页文章数量\n",
    "element = driver.find_element_by_id('perPageDiv')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_xpath('//*[@id=\"perPageDiv\"]/div/span')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>篇名</th>\n",
       "      <th>作者</th>\n",
       "      <th>刊名</th>\n",
       "      <th>发表时间</th>\n",
       "      <th>被引</th>\n",
       "      <th>下载</th>\n",
       "      <th>操作</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>基于大数据与信息技术的拖拉机零部件供应链</td>\n",
       "      <td>王弥</td>\n",
       "      <td>农机化研究</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>智能制造和大数据挖掘在农业机械设计中的应用</td>\n",
       "      <td>杜伯阳</td>\n",
       "      <td>农机化研究</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>人工智能教育的含义界定与原理挖掘</td>\n",
       "      <td>彭绍东</td>\n",
       "      <td>中国电化教育</td>\n",
       "      <td>2021-06-08</td>\n",
       "      <td>NaN</td>\n",
       "      <td>218.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>人工智能应用场景的界定与开发</td>\n",
       "      <td>李梦薇; 徐峰; 高芳</td>\n",
       "      <td>中国科技论坛</td>\n",
       "      <td>2021-06-05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>近5年图情SSCI期刊人工智能伦理研究文献分析与启示</td>\n",
       "      <td>黄崑; 徐晓婷; 黎安润泽; 徐峰</td>\n",
       "      <td>现代情报</td>\n",
       "      <td>2021-06-01</td>\n",
       "      <td>NaN</td>\n",
       "      <td>405.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>教育信息化2.0背景下省级教育大数据平台建设研究  网络首发</td>\n",
       "      <td>汤岭球</td>\n",
       "      <td>当代教育论坛</td>\n",
       "      <td>2021-05-26 18:52</td>\n",
       "      <td>NaN</td>\n",
       "      <td>334.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>大数据挖掘技术的光流场图像匹配方法设计</td>\n",
       "      <td>黄凯宁; 郭有强; 杨静</td>\n",
       "      <td>激光杂志</td>\n",
       "      <td>2021-05-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>11.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>大数据下基于机器学习的项目智能成本管理研究——以A风景园林规划研究院规划设计类项目为例</td>\n",
       "      <td>程平; 彭兰雅; 辜榕容</td>\n",
       "      <td>财会通讯</td>\n",
       "      <td>2021-05-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>179.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>AI视觉识别在食品工业中的运用</td>\n",
       "      <td>熊章军</td>\n",
       "      <td>食品工业</td>\n",
       "      <td>2021-05-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>13.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>新时期中欧人工智能发展战略与政策环境的比较研究</td>\n",
       "      <td>关皓元; 高杰</td>\n",
       "      <td>管理现代化</td>\n",
       "      <td>2021-05-17 15:21</td>\n",
       "      <td>NaN</td>\n",
       "      <td>268.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>11</td>\n",
       "      <td>大数据技术视域下智慧图书馆伦理危机与控制研究</td>\n",
       "      <td>陆康; 刘慧; 曹畋</td>\n",
       "      <td>高校图书馆工作</td>\n",
       "      <td>2021-05-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>86.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>12</td>\n",
       "      <td>药物研发领域人工智能应用与创新发展策略探讨  网络首发</td>\n",
       "      <td>茅鸯对; 柳鹏程</td>\n",
       "      <td>中国新药与临床杂志</td>\n",
       "      <td>2021-05-13 15:25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>342.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>13</td>\n",
       "      <td>智能时代的中小学人工智能教育：总体定位与核心内容领域</td>\n",
       "      <td>卢宇; 汤筱玙; 宋佳宸; 余胜泉</td>\n",
       "      <td>中国远程教育</td>\n",
       "      <td>2021-05-12</td>\n",
       "      <td>NaN</td>\n",
       "      <td>810.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>14</td>\n",
       "      <td>大数据环境下基于谱机器学习的云物流资源配置</td>\n",
       "      <td>张人龙; 刘小红</td>\n",
       "      <td>统计与决策</td>\n",
       "      <td>2021-05-10 10:39</td>\n",
       "      <td>NaN</td>\n",
       "      <td>230.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>15</td>\n",
       "      <td>酶工程：从人工设计到人工智能  网络首发</td>\n",
       "      <td>王雅丽;付友思;陈俊宏;黄佳城;廖浪星</td>\n",
       "      <td>化工学报</td>\n",
       "      <td>2021-05-08 14:42</td>\n",
       "      <td>NaN</td>\n",
       "      <td>166.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>16</td>\n",
       "      <td>论技术向善何以可能——人工智能教育伦理的逻辑起点</td>\n",
       "      <td>孙田琳子</td>\n",
       "      <td>高教探索</td>\n",
       "      <td>2021-05-05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>60.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>17</td>\n",
       "      <td>大数据下区域学业质量分析的困境及其破解</td>\n",
       "      <td>曹雷</td>\n",
       "      <td>教学与管理</td>\n",
       "      <td>2021-05-01</td>\n",
       "      <td>NaN</td>\n",
       "      <td>51.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>18</td>\n",
       "      <td>基于大数据的产品族本体造型意象挖掘方法研究  网络首发</td>\n",
       "      <td>王鹏; 朱韦龙</td>\n",
       "      <td>图学学报</td>\n",
       "      <td>2021-04-30 10:50</td>\n",
       "      <td>NaN</td>\n",
       "      <td>153.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>19</td>\n",
       "      <td>基于智能制造和大数据挖掘的农机数字化设计研究</td>\n",
       "      <td>任燕; 崔庚彦</td>\n",
       "      <td>农机化研究</td>\n",
       "      <td>2021-04-28</td>\n",
       "      <td>NaN</td>\n",
       "      <td>201.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>20</td>\n",
       "      <td>智能化学本评估初探——基于AI教师主讲课堂的试验研究</td>\n",
       "      <td>阮婷婷; 黄甫全; 曾文婕</td>\n",
       "      <td>教育研究与实验</td>\n",
       "      <td>2021-04-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>41.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>21</td>\n",
       "      <td>语义分析及向量化大数据XSS入侵识别</td>\n",
       "      <td>张海军; 陈映辉</td>\n",
       "      <td>南开大学学报(自然科学版)</td>\n",
       "      <td>2021-04-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>22</td>\n",
       "      <td>大数据平台下化工企业消防安全风险分析指标设计</td>\n",
       "      <td>王滨滨;褚新颖;程诚;王禹寒;胡文倩</td>\n",
       "      <td>南开大学学报(自然科学版)</td>\n",
       "      <td>2021-04-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>23</td>\n",
       "      <td>知识牵引与数据驱动的兵棋AI设计及关键技术  网络首发</td>\n",
       "      <td>程恺; 陈刚; 余晓晗; 刘满; 邵天浩</td>\n",
       "      <td>系统工程与电子技术</td>\n",
       "      <td>2021-04-19 08:31</td>\n",
       "      <td>NaN</td>\n",
       "      <td>117.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>24</td>\n",
       "      <td>基于网络舆情大数据的公共政策评价研究</td>\n",
       "      <td>邱尔丽; 张竞; 王雨舟; 曹攀</td>\n",
       "      <td>领导科学</td>\n",
       "      <td>2021-04-16</td>\n",
       "      <td>NaN</td>\n",
       "      <td>144.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>25</td>\n",
       "      <td>基于Mapreduce的多源多模态大数据检索方法研究</td>\n",
       "      <td>魏秀卓; 赵慧南</td>\n",
       "      <td>计算机仿真</td>\n",
       "      <td>2021-04-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>26</td>\n",
       "      <td>从学习效果和教育公平的角度看高等教育人工智能应用——一项基于多个数据库英文同行评审期刊文献的综述</td>\n",
       "      <td>郝丹; 肖俊洪</td>\n",
       "      <td>现代教育技术</td>\n",
       "      <td>2021-04-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>176.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>27</td>\n",
       "      <td>“城市仿真”大数据平台简介</td>\n",
       "      <td>NaN</td>\n",
       "      <td>交通运输系统工程与信息</td>\n",
       "      <td>2021-04-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>33.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>28</td>\n",
       "      <td>人工智能非主体性前提下侵权责任承担机制研究</td>\n",
       "      <td>付其运</td>\n",
       "      <td>法学杂志</td>\n",
       "      <td>2021-04-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>188.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>29</td>\n",
       "      <td>机用碳纤维/ABS复合材料人工智能辅助成型工艺关键技术研究</td>\n",
       "      <td>黄罡</td>\n",
       "      <td>化工新型材料</td>\n",
       "      <td>2021-04-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>38.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>30</td>\n",
       "      <td>城市轨道交通勘测大数据的集成与应用</td>\n",
       "      <td>任传斌; 于淼; 李珂; 冯增文</td>\n",
       "      <td>地质论评</td>\n",
       "      <td>2021-04-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>16.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>31</td>\n",
       "      <td>人工智能驱动下智慧养老服务模式构建研究</td>\n",
       "      <td>赵奕钧; 邓大松</td>\n",
       "      <td>江淮论坛</td>\n",
       "      <td>2021-04-12 10:39</td>\n",
       "      <td>NaN</td>\n",
       "      <td>842.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>32</td>\n",
       "      <td>电子科技大学周军教授团队在芯片奥林匹克会议ISSCC发表人工智能芯片论文</td>\n",
       "      <td>NaN</td>\n",
       "      <td>信息网络安全</td>\n",
       "      <td>2021-04-10</td>\n",
       "      <td>NaN</td>\n",
       "      <td>29.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>33</td>\n",
       "      <td>面向重大公共事务决策风险治理的大数据行动框架</td>\n",
       "      <td>毕凌燕; 张海璇; 左文明</td>\n",
       "      <td>科技管理研究</td>\n",
       "      <td>2021-04-10</td>\n",
       "      <td>NaN</td>\n",
       "      <td>48.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>34</td>\n",
       "      <td>共智融合的大数据智能化人才培养研究与实践</td>\n",
       "      <td>冯永; 钟将; 王茜; 李学明</td>\n",
       "      <td>中国电化教育</td>\n",
       "      <td>2021-04-08</td>\n",
       "      <td>NaN</td>\n",
       "      <td>400.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>35</td>\n",
       "      <td>从教学样式到学习范式：人工智能环境下学习的通用设计转化</td>\n",
       "      <td>杨绪辉</td>\n",
       "      <td>中国电化教育</td>\n",
       "      <td>2021-04-08</td>\n",
       "      <td>NaN</td>\n",
       "      <td>285.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>36</td>\n",
       "      <td>人工智能教学中“知识建构、STEM、创客”三位一体教学模型的设计与应用</td>\n",
       "      <td>丁美荣; 王同聚</td>\n",
       "      <td>电化教育研究</td>\n",
       "      <td>2021-04-01</td>\n",
       "      <td>NaN</td>\n",
       "      <td>751.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>37</td>\n",
       "      <td>基于大数据分析技术的智能教学系统</td>\n",
       "      <td>刘一虹</td>\n",
       "      <td>现代电子技术</td>\n",
       "      <td>2021-03-30</td>\n",
       "      <td>NaN</td>\n",
       "      <td>291.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>38</td>\n",
       "      <td>基于大数据的人脸识别方法</td>\n",
       "      <td>王彦秋; 冯英伟</td>\n",
       "      <td>现代电子技术</td>\n",
       "      <td>2021-03-30</td>\n",
       "      <td>NaN</td>\n",
       "      <td>692.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>39</td>\n",
       "      <td>基于大数据的多群体用户画像构建系统设计</td>\n",
       "      <td>郭娜; 魏荣凯</td>\n",
       "      <td>现代电子技术</td>\n",
       "      <td>2021-03-30</td>\n",
       "      <td>NaN</td>\n",
       "      <td>479.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>40</td>\n",
       "      <td>人工智能环境下广告创意人才的培养</td>\n",
       "      <td>赵朴</td>\n",
       "      <td>出版广角</td>\n",
       "      <td>2021-03-30</td>\n",
       "      <td>NaN</td>\n",
       "      <td>73.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>41</td>\n",
       "      <td>基于MapReduce的大数据在线聚集优化设计</td>\n",
       "      <td>李骏</td>\n",
       "      <td>河北大学学报(自然科学版)</td>\n",
       "      <td>2021-03-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>11.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>42</td>\n",
       "      <td>航空大数据研究综述</td>\n",
       "      <td>赵学武;吴宁;王军;阮利;李玲玲</td>\n",
       "      <td>计算机科学与探索</td>\n",
       "      <td>2021-03-23 16:56</td>\n",
       "      <td>NaN</td>\n",
       "      <td>342.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>43</td>\n",
       "      <td>人工智能技术的船舶航行轨迹控制算法</td>\n",
       "      <td>殷慧</td>\n",
       "      <td>舰船科学技术</td>\n",
       "      <td>2021-03-23</td>\n",
       "      <td>NaN</td>\n",
       "      <td>41.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>44</td>\n",
       "      <td>大数据挖掘算法无人船信息管理系统</td>\n",
       "      <td>解莹</td>\n",
       "      <td>舰船科学技术</td>\n",
       "      <td>2021-03-23</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>45</td>\n",
       "      <td>负责任的人工智能与设计创新</td>\n",
       "      <td>王韫; 徐迎庆</td>\n",
       "      <td>包装工程</td>\n",
       "      <td>2021-03-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>176.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>46</td>\n",
       "      <td>AI设计下的文本视觉问答技术</td>\n",
       "      <td>晋赞霞; 覃京燕; 殷绪成</td>\n",
       "      <td>包装工程</td>\n",
       "      <td>2021-03-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>43.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>47</td>\n",
       "      <td>AI设计下的智能驾驶场景文本识别技术</td>\n",
       "      <td>梁敏; 秦海波; 覃京燕; 殷绪成</td>\n",
       "      <td>包装工程</td>\n",
       "      <td>2021-03-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>127.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>48</td>\n",
       "      <td>唯物史观论域下人工智能的伦理问题思考与合伦理设计</td>\n",
       "      <td>田鹏颖; 周鑫</td>\n",
       "      <td>宁夏社会科学</td>\n",
       "      <td>2021-03-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>275.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>49</td>\n",
       "      <td>基于大数据糖尿病患者膳食管理系统的设计</td>\n",
       "      <td>章琦; 章玮; 白正玉</td>\n",
       "      <td>中国全科医学</td>\n",
       "      <td>2021-03-18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>203.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>50</td>\n",
       "      <td>人工智能算法在铁道车辆动力学仿真中的应用进展</td>\n",
       "      <td>唐兆;董少迪;罗仁;蒋涛;邓锐</td>\n",
       "      <td>交通运输工程学报</td>\n",
       "      <td>2021-03-16 11:06</td>\n",
       "      <td>NaN</td>\n",
       "      <td>237.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Unnamed: 0                                                篇名  \\\n",
       "0            1                              基于大数据与信息技术的拖拉机零部件供应链   \n",
       "1            2                             智能制造和大数据挖掘在农业机械设计中的应用   \n",
       "2            3                                  人工智能教育的含义界定与原理挖掘   \n",
       "3            4                                    人工智能应用场景的界定与开发   \n",
       "4            5                        近5年图情SSCI期刊人工智能伦理研究文献分析与启示   \n",
       "5            6                    教育信息化2.0背景下省级教育大数据平台建设研究  网络首发   \n",
       "6            7                               大数据挖掘技术的光流场图像匹配方法设计   \n",
       "7            8       大数据下基于机器学习的项目智能成本管理研究——以A风景园林规划研究院规划设计类项目为例   \n",
       "8            9                                   AI视觉识别在食品工业中的运用   \n",
       "9           10                           新时期中欧人工智能发展战略与政策环境的比较研究   \n",
       "10          11                            大数据技术视域下智慧图书馆伦理危机与控制研究   \n",
       "11          12                       药物研发领域人工智能应用与创新发展策略探讨  网络首发   \n",
       "12          13                        智能时代的中小学人工智能教育：总体定位与核心内容领域   \n",
       "13          14                             大数据环境下基于谱机器学习的云物流资源配置   \n",
       "14          15                              酶工程：从人工设计到人工智能  网络首发   \n",
       "15          16                          论技术向善何以可能——人工智能教育伦理的逻辑起点   \n",
       "16          17                               大数据下区域学业质量分析的困境及其破解   \n",
       "17          18                       基于大数据的产品族本体造型意象挖掘方法研究  网络首发   \n",
       "18          19                            基于智能制造和大数据挖掘的农机数字化设计研究   \n",
       "19          20                        智能化学本评估初探——基于AI教师主讲课堂的试验研究   \n",
       "20          21                                语义分析及向量化大数据XSS入侵识别   \n",
       "21          22                            大数据平台下化工企业消防安全风险分析指标设计   \n",
       "22          23                       知识牵引与数据驱动的兵棋AI设计及关键技术  网络首发   \n",
       "23          24                                基于网络舆情大数据的公共政策评价研究   \n",
       "24          25                        基于Mapreduce的多源多模态大数据检索方法研究   \n",
       "25          26  从学习效果和教育公平的角度看高等教育人工智能应用——一项基于多个数据库英文同行评审期刊文献的综述   \n",
       "26          27                                     “城市仿真”大数据平台简介   \n",
       "27          28                             人工智能非主体性前提下侵权责任承担机制研究   \n",
       "28          29                     机用碳纤维/ABS复合材料人工智能辅助成型工艺关键技术研究   \n",
       "29          30                                 城市轨道交通勘测大数据的集成与应用   \n",
       "30          31                               人工智能驱动下智慧养老服务模式构建研究   \n",
       "31          32              电子科技大学周军教授团队在芯片奥林匹克会议ISSCC发表人工智能芯片论文   \n",
       "32          33                            面向重大公共事务决策风险治理的大数据行动框架   \n",
       "33          34                              共智融合的大数据智能化人才培养研究与实践   \n",
       "34          35                       从教学样式到学习范式：人工智能环境下学习的通用设计转化   \n",
       "35          36               人工智能教学中“知识建构、STEM、创客”三位一体教学模型的设计与应用   \n",
       "36          37                                  基于大数据分析技术的智能教学系统   \n",
       "37          38                                      基于大数据的人脸识别方法   \n",
       "38          39                               基于大数据的多群体用户画像构建系统设计   \n",
       "39          40                                  人工智能环境下广告创意人才的培养   \n",
       "40          41                           基于MapReduce的大数据在线聚集优化设计   \n",
       "41          42                                         航空大数据研究综述   \n",
       "42          43                                 人工智能技术的船舶航行轨迹控制算法   \n",
       "43          44                                  大数据挖掘算法无人船信息管理系统   \n",
       "44          45                                     负责任的人工智能与设计创新   \n",
       "45          46                                    AI设计下的文本视觉问答技术   \n",
       "46          47                                AI设计下的智能驾驶场景文本识别技术   \n",
       "47          48                          唯物史观论域下人工智能的伦理问题思考与合伦理设计   \n",
       "48          49                               基于大数据糖尿病患者膳食管理系统的设计   \n",
       "49          50                            人工智能算法在铁道车辆动力学仿真中的应用进展   \n",
       "\n",
       "                      作者             刊名              发表时间  被引     下载  操作  \n",
       "0                     王弥          农机化研究        2021-06-15 NaN    NaN  下载  \n",
       "1                    杜伯阳          农机化研究        2021-06-15 NaN    NaN  下载  \n",
       "2                    彭绍东         中国电化教育        2021-06-08 NaN  218.0  下载  \n",
       "3            李梦薇; 徐峰; 高芳         中国科技论坛        2021-06-05 NaN    NaN  下载  \n",
       "4      黄崑; 徐晓婷; 黎安润泽; 徐峰           现代情报        2021-06-01 NaN  405.0  下载  \n",
       "5                    汤岭球         当代教育论坛  2021-05-26 18:52 NaN  334.0  下载  \n",
       "6           黄凯宁; 郭有强; 杨静           激光杂志        2021-05-25 NaN   11.0  下载  \n",
       "7           程平; 彭兰雅; 辜榕容           财会通讯        2021-05-20 NaN  179.0  下载  \n",
       "8                    熊章军           食品工业        2021-05-20 NaN   13.0  下载  \n",
       "9                关皓元; 高杰          管理现代化  2021-05-17 15:21 NaN  268.0  下载  \n",
       "10            陆康; 刘慧; 曹畋        高校图书馆工作        2021-05-15 NaN   86.0  下载  \n",
       "11              茅鸯对; 柳鹏程      中国新药与临床杂志  2021-05-13 15:25 NaN  342.0  下载  \n",
       "12     卢宇; 汤筱玙; 宋佳宸; 余胜泉         中国远程教育        2021-05-12 NaN  810.0  下载  \n",
       "13              张人龙; 刘小红          统计与决策  2021-05-10 10:39 NaN  230.0  下载  \n",
       "14   王雅丽;付友思;陈俊宏;黄佳城;廖浪星           化工学报  2021-05-08 14:42 NaN  166.0  下载  \n",
       "15                  孙田琳子           高教探索        2021-05-05 NaN   60.0  下载  \n",
       "16                    曹雷          教学与管理        2021-05-01 NaN   51.0  下载  \n",
       "17               王鹏; 朱韦龙           图学学报  2021-04-30 10:50 NaN  153.0  下载  \n",
       "18               任燕; 崔庚彦          农机化研究        2021-04-28 NaN  201.0  下载  \n",
       "19         阮婷婷; 黄甫全; 曾文婕        教育研究与实验        2021-04-20 NaN   41.0  下载  \n",
       "20              张海军; 陈映辉  南开大学学报(自然科学版)        2021-04-20 NaN    7.0  下载  \n",
       "21    王滨滨;褚新颖;程诚;王禹寒;胡文倩  南开大学学报(自然科学版)        2021-04-20 NaN    7.0  下载  \n",
       "22  程恺; 陈刚; 余晓晗; 刘满; 邵天浩      系统工程与电子技术  2021-04-19 08:31 NaN  117.0  下载  \n",
       "23      邱尔丽; 张竞; 王雨舟; 曹攀           领导科学        2021-04-16 NaN  144.0  下载  \n",
       "24              魏秀卓; 赵慧南          计算机仿真        2021-04-15 NaN   36.0  下载  \n",
       "25               郝丹; 肖俊洪         现代教育技术        2021-04-15 NaN  176.0  下载  \n",
       "26                   NaN    交通运输系统工程与信息        2021-04-15 NaN   33.0  下载  \n",
       "27                   付其运           法学杂志        2021-04-15 NaN  188.0  下载  \n",
       "28                    黄罡         化工新型材料        2021-04-15 NaN   38.0  下载  \n",
       "29      任传斌; 于淼; 李珂; 冯增文           地质论评        2021-04-15 NaN   16.0  下载  \n",
       "30              赵奕钧; 邓大松           江淮论坛  2021-04-12 10:39 NaN  842.0  下载  \n",
       "31                   NaN         信息网络安全        2021-04-10 NaN   29.0  下载  \n",
       "32         毕凌燕; 张海璇; 左文明         科技管理研究        2021-04-10 NaN   48.0  下载  \n",
       "33       冯永; 钟将; 王茜; 李学明         中国电化教育        2021-04-08 NaN  400.0  下载  \n",
       "34                   杨绪辉         中国电化教育        2021-04-08 NaN  285.0  下载  \n",
       "35              丁美荣; 王同聚         电化教育研究        2021-04-01 NaN  751.0  下载  \n",
       "36                   刘一虹         现代电子技术        2021-03-30 NaN  291.0  下载  \n",
       "37              王彦秋; 冯英伟         现代电子技术        2021-03-30 NaN  692.0  下载  \n",
       "38               郭娜; 魏荣凯         现代电子技术        2021-03-30 NaN  479.0  下载  \n",
       "39                    赵朴           出版广角        2021-03-30 NaN   73.0  下载  \n",
       "40                    李骏  河北大学学报(自然科学版)        2021-03-25 NaN   11.0  下载  \n",
       "41      赵学武;吴宁;王军;阮利;李玲玲       计算机科学与探索  2021-03-23 16:56 NaN  342.0  下载  \n",
       "42                    殷慧         舰船科学技术        2021-03-23 NaN   41.0  下载  \n",
       "43                    解莹         舰船科学技术        2021-03-23 NaN   36.0  下载  \n",
       "44               王韫; 徐迎庆           包装工程        2021-03-20 NaN  176.0  下载  \n",
       "45         晋赞霞; 覃京燕; 殷绪成           包装工程        2021-03-20 NaN   43.0  下载  \n",
       "46     梁敏; 秦海波; 覃京燕; 殷绪成           包装工程        2021-03-20 NaN  127.0  下载  \n",
       "47               田鹏颖; 周鑫         宁夏社会科学        2021-03-20 NaN  275.0  下载  \n",
       "48           章琦; 章玮; 白正玉         中国全科医学        2021-03-18 NaN  203.0  下载  \n",
       "49       唐兆;董少迪;罗仁;蒋涛;邓锐       交通运输工程学报  2021-03-16 11:06 NaN  237.0  下载  "
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 抓取相关重要信息\n",
    "element = driver.find_element_by_id('gridTable')\n",
    "page_html = element.get_attribute('innerHTML')\n",
    "页面数据 = pd.read_html(page_html)[0]\n",
    "页面数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'下一页'"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 翻页\n",
    "element = driver.find_element_by_id('PageNext')\n",
    "element.get_attribute('innerHTML')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'1/28'"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 跳转上限\n",
    "element = driver.find_element_by_xpath('//span[@class=\"countPageMark\"]')\n",
    "page_str = element.get_attribute('innerHTML')\n",
    "page_str "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['1', '28']"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "page_int = page_str.split('/')\n",
    "page_int"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]\n"
     ]
    }
   ],
   "source": [
    "pages = list(range(1,int(page_int[1])+1))\n",
    "print(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 取前18页\n",
    "pages = list(range(1,19))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "表格_html = dict()\n",
    "main_content =\"\"\n",
    "element = None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 函数（翻页）\n",
    "def process_pages (pages):\n",
    "    for p in pages:\n",
    "        print (p,end='\\t')\n",
    "        # 定位到“下一页”的按钮 ——> 点击\n",
    "        跳转 = driver.find_element_by_id('PageNext')\n",
    "        跳转.click()\n",
    "        # 设定休息的时间 ——> 避免爬虫被禁报错、以及出现验证码\n",
    "        time.sleep(15+20*random())\n",
    "        # 获取含有页面主要数据的表格\n",
    "        element = driver.find_element_by_id('gridTable')\n",
    "        main_content = element.get_attribute('innerHTML')\n",
    "        表格_html[p] = main_content"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\t2\t3\t4\t5\t6\t7\t8\t9\t10\t11\t12\t13\t14\t15\t16\t17\t18\t"
     ]
    }
   ],
   "source": [
    "process_pages(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>html_snippets</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                        html_snippets\n",
       "1   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "2   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "3   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "4   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "5   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "6   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "7   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "8   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "9   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "10  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "11  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "12  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "13  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "14  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "15  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "16  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "17  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "18  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ..."
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame([表格_html]).T\n",
    "df.columns = [\"html_snippets\"]\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "网站 = \"中国知网\"\n",
    "# 指定内容输出的位置\n",
    "fn = { \"output\" : { \"htm_snippets\": \"data/知网_{网站}.tsv\"}\n",
    "     }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 保存页面内容的csv文件\n",
    "filename = fn [\"output\"] [\"htm_snippets\"] \n",
    "df.to_csv(filename.format(网站=网站), sep=\"\\t\", encoding=\"utf8\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "l_df = []\n",
    "for p in pages:\n",
    "    表格 = pd.read_html(表格_html[p])[0]\n",
    "    l_df.append(表格)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>篇名</th>\n",
       "      <th>作者</th>\n",
       "      <th>刊名</th>\n",
       "      <th>发表时间</th>\n",
       "      <th>被引</th>\n",
       "      <th>下载</th>\n",
       "      <th>操作</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>基于大数据与信息技术的拖拉机零部件供应链</td>\n",
       "      <td>王弥</td>\n",
       "      <td>农机化研究</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>智能制造和大数据挖掘在农业机械设计中的应用</td>\n",
       "      <td>杜伯阳</td>\n",
       "      <td>农机化研究</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>人工智能教育的含义界定与原理挖掘</td>\n",
       "      <td>彭绍东</td>\n",
       "      <td>中国电化教育</td>\n",
       "      <td>2021-06-08</td>\n",
       "      <td>NaN</td>\n",
       "      <td>218.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>人工智能应用场景的界定与开发</td>\n",
       "      <td>李梦薇; 徐峰; 高芳</td>\n",
       "      <td>中国科技论坛</td>\n",
       "      <td>2021-06-05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>近5年图情SSCI期刊人工智能伦理研究文献分析与启示</td>\n",
       "      <td>黄崑; 徐晓婷; 黎安润泽; 徐峰</td>\n",
       "      <td>现代情报</td>\n",
       "      <td>2021-06-01</td>\n",
       "      <td>NaN</td>\n",
       "      <td>405.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>895</th>\n",
       "      <td>946</td>\n",
       "      <td>网络大数据平台中的特征数据分类系统设计与实现</td>\n",
       "      <td>张科星</td>\n",
       "      <td>现代电子技术</td>\n",
       "      <td>2017-04-15</td>\n",
       "      <td>9.0</td>\n",
       "      <td>153.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>896</th>\n",
       "      <td>947</td>\n",
       "      <td>云平台下海量数据冲击中的可视化调度平台设计</td>\n",
       "      <td>聂晶; 石中坚</td>\n",
       "      <td>现代电子技术</td>\n",
       "      <td>2017-04-15</td>\n",
       "      <td>3.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>897</th>\n",
       "      <td>948</td>\n",
       "      <td>大数据技术在海量测震数据中的研究应用</td>\n",
       "      <td>郭凯; 黄金刚; 彭克银; 庞丽娜</td>\n",
       "      <td>地震研究</td>\n",
       "      <td>2017-04-15</td>\n",
       "      <td>7.0</td>\n",
       "      <td>88.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>898</th>\n",
       "      <td>949</td>\n",
       "      <td>大数据时代教学新模式:知识传授与内化互补融合</td>\n",
       "      <td>邹娟娟</td>\n",
       "      <td>江西社会科学</td>\n",
       "      <td>2017-04-15</td>\n",
       "      <td>10.0</td>\n",
       "      <td>486.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>899</th>\n",
       "      <td>950</td>\n",
       "      <td>大数据背景下视觉传播的认知驱动创新研究</td>\n",
       "      <td>席涛</td>\n",
       "      <td>西南民族大学学报(人文社科版)</td>\n",
       "      <td>2017-04-10</td>\n",
       "      <td>3.0</td>\n",
       "      <td>455.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>950 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Unnamed: 0                          篇名                 作者  \\\n",
       "0             1        基于大数据与信息技术的拖拉机零部件供应链                 王弥   \n",
       "1             2       智能制造和大数据挖掘在农业机械设计中的应用                杜伯阳   \n",
       "2             3            人工智能教育的含义界定与原理挖掘                彭绍东   \n",
       "3             4              人工智能应用场景的界定与开发        李梦薇; 徐峰; 高芳   \n",
       "4             5  近5年图情SSCI期刊人工智能伦理研究文献分析与启示  黄崑; 徐晓婷; 黎安润泽; 徐峰   \n",
       "..          ...                         ...                ...   \n",
       "895         946      网络大数据平台中的特征数据分类系统设计与实现                张科星   \n",
       "896         947       云平台下海量数据冲击中的可视化调度平台设计            聂晶; 石中坚   \n",
       "897         948          大数据技术在海量测震数据中的研究应用  郭凯; 黄金刚; 彭克银; 庞丽娜   \n",
       "898         949      大数据时代教学新模式:知识传授与内化互补融合                邹娟娟   \n",
       "899         950         大数据背景下视觉传播的认知驱动创新研究                 席涛   \n",
       "\n",
       "                  刊名        发表时间    被引     下载  操作  \n",
       "0              农机化研究  2021-06-15   NaN    NaN  下载  \n",
       "1              农机化研究  2021-06-15   NaN    NaN  下载  \n",
       "2             中国电化教育  2021-06-08   NaN  218.0  下载  \n",
       "3             中国科技论坛  2021-06-05   NaN    NaN  下载  \n",
       "4               现代情报  2021-06-01   NaN  405.0  下载  \n",
       "..               ...         ...   ...    ...  ..  \n",
       "895           现代电子技术  2017-04-15   9.0  153.0  下载  \n",
       "896           现代电子技术  2017-04-15   3.0   59.0  下载  \n",
       "897             地震研究  2017-04-15   7.0   88.0  下载  \n",
       "898           江西社会科学  2017-04-15  10.0  486.0  下载  \n",
       "899  西南民族大学学报(人文社科版)  2017-04-10   3.0  455.0  下载  \n",
       "\n",
       "[950 rows x 8 columns]"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_url_out = pd.concat(l_df).reset_index(drop=True)\n",
    "df_总表 = 页面数据.append(df_url_out)\n",
    "df_总表"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>篇名</th>\n",
       "      <th>作者</th>\n",
       "      <th>刊名</th>\n",
       "      <th>发表时间</th>\n",
       "      <th>被引</th>\n",
       "      <th>下载</th>\n",
       "      <th>操作</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>基于大数据与信息技术的拖拉机零部件供应链</td>\n",
       "      <td>王弥</td>\n",
       "      <td>农机化研究</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>智能制造和大数据挖掘在农业机械设计中的应用</td>\n",
       "      <td>杜伯阳</td>\n",
       "      <td>农机化研究</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>人工智能教育的含义界定与原理挖掘</td>\n",
       "      <td>彭绍东</td>\n",
       "      <td>中国电化教育</td>\n",
       "      <td>2021-06-08</td>\n",
       "      <td>NaN</td>\n",
       "      <td>218.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>人工智能应用场景的界定与开发</td>\n",
       "      <td>李梦薇; 徐峰; 高芳</td>\n",
       "      <td>中国科技论坛</td>\n",
       "      <td>2021-06-05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>近5年图情SSCI期刊人工智能伦理研究文献分析与启示</td>\n",
       "      <td>黄崑; 徐晓婷; 黎安润泽; 徐峰</td>\n",
       "      <td>现代情报</td>\n",
       "      <td>2021-06-01</td>\n",
       "      <td>NaN</td>\n",
       "      <td>405.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>895</th>\n",
       "      <td>946</td>\n",
       "      <td>网络大数据平台中的特征数据分类系统设计与实现</td>\n",
       "      <td>张科星</td>\n",
       "      <td>现代电子技术</td>\n",
       "      <td>2017-04-15</td>\n",
       "      <td>9.0</td>\n",
       "      <td>153.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>896</th>\n",
       "      <td>947</td>\n",
       "      <td>云平台下海量数据冲击中的可视化调度平台设计</td>\n",
       "      <td>聂晶; 石中坚</td>\n",
       "      <td>现代电子技术</td>\n",
       "      <td>2017-04-15</td>\n",
       "      <td>3.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>897</th>\n",
       "      <td>948</td>\n",
       "      <td>大数据技术在海量测震数据中的研究应用</td>\n",
       "      <td>郭凯; 黄金刚; 彭克银; 庞丽娜</td>\n",
       "      <td>地震研究</td>\n",
       "      <td>2017-04-15</td>\n",
       "      <td>7.0</td>\n",
       "      <td>88.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>898</th>\n",
       "      <td>949</td>\n",
       "      <td>大数据时代教学新模式:知识传授与内化互补融合</td>\n",
       "      <td>邹娟娟</td>\n",
       "      <td>江西社会科学</td>\n",
       "      <td>2017-04-15</td>\n",
       "      <td>10.0</td>\n",
       "      <td>486.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>899</th>\n",
       "      <td>950</td>\n",
       "      <td>大数据背景下视觉传播的认知驱动创新研究</td>\n",
       "      <td>席涛</td>\n",
       "      <td>西南民族大学学报(人文社科版)</td>\n",
       "      <td>2017-04-10</td>\n",
       "      <td>3.0</td>\n",
       "      <td>455.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>950 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Unnamed: 0                          篇名                 作者  \\\n",
       "0             1        基于大数据与信息技术的拖拉机零部件供应链                 王弥   \n",
       "1             2       智能制造和大数据挖掘在农业机械设计中的应用                杜伯阳   \n",
       "2             3            人工智能教育的含义界定与原理挖掘                彭绍东   \n",
       "3             4              人工智能应用场景的界定与开发        李梦薇; 徐峰; 高芳   \n",
       "4             5  近5年图情SSCI期刊人工智能伦理研究文献分析与启示  黄崑; 徐晓婷; 黎安润泽; 徐峰   \n",
       "..          ...                         ...                ...   \n",
       "895         946      网络大数据平台中的特征数据分类系统设计与实现                张科星   \n",
       "896         947       云平台下海量数据冲击中的可视化调度平台设计            聂晶; 石中坚   \n",
       "897         948          大数据技术在海量测震数据中的研究应用  郭凯; 黄金刚; 彭克银; 庞丽娜   \n",
       "898         949      大数据时代教学新模式:知识传授与内化互补融合                邹娟娟   \n",
       "899         950         大数据背景下视觉传播的认知驱动创新研究                 席涛   \n",
       "\n",
       "                  刊名        发表时间    被引     下载  操作  \n",
       "0              农机化研究  2021-06-15   NaN    NaN  下载  \n",
       "1              农机化研究  2021-06-15   NaN    NaN  下载  \n",
       "2             中国电化教育  2021-06-08   NaN  218.0  下载  \n",
       "3             中国科技论坛  2021-06-05   NaN    NaN  下载  \n",
       "4               现代情报  2021-06-01   NaN  405.0  下载  \n",
       "..               ...         ...   ...    ...  ..  \n",
       "895           现代电子技术  2017-04-15   9.0  153.0  下载  \n",
       "896           现代电子技术  2017-04-15   3.0   59.0  下载  \n",
       "897             地震研究  2017-04-15   7.0   88.0  下载  \n",
       "898           江西社会科学  2017-04-15  10.0  486.0  下载  \n",
       "899  西南民族大学学报(人文社科版)  2017-04-10   3.0  455.0  下载  \n",
       "\n",
       "[950 rows x 8 columns]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 将内容表格存在本地\n",
    "with pd.ExcelWriter('知网文章数据.xlsx',mode='w',engine=\"openpyxl\") as writer:  \n",
    "            df_总表.to_excel(writer,sheet_name=\"知网\")\n",
    "display(df_总表)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 下载refworks文件及原文"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n"
     ]
    }
   ],
   "source": [
    "# 导出refworks文件（.txt）和下载文章\n",
    "# 每次全选不能超过500篇，分2次进行\n",
    "pages = list(range(1,11))\n",
    "print(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 返回第一页\n",
    "driver.find_element_by_id('total').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 清除选中\n",
    "driver.find_element_by_xpath('//*[@id=\"gridTable\"]/div[1]/div[2]/div[1]/a').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 选中页面50篇 —> 翻页\n",
    "def process_choose(pages):\n",
    "    for p in pages:\n",
    "        print (p,end='\\t')\n",
    "        全选 = driver.find_element_by_id('selectCheckAll1')\n",
    "        全选.click()\n",
    "        time.sleep(15+20*random())\n",
    "        跳转 = driver.find_element_by_id('PageNext')\n",
    "        跳转.click()\n",
    "        time.sleep(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\t2\t3\t4\t5\t6\t7\t8\t9\t10\t"
     ]
    }
   ],
   "source": [
    "process_choose(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出与分析 \n",
    "driver.find_element_by_xpath('//i[@class=\"icon-d\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出文献\n",
    "driver.find_element_by_xpath('//i[@class=\"icon-r\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 点击Refworks\n",
    "driver.find_element_by_xpath('//a[@exporttype=\"Refworks\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CDwindow-8D8283095CD2D3246CFBF4EE753ABEC4',\n",
       " 'CDwindow-1CD9E637B649BF5766B745C8B5E27E72',\n",
       " 'CDwindow-76303F9E7B0196950BA7E3B3787CD2C3']"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看所有的窗口信息\n",
    "driver.window_handles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-47-520070efe65b>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[2])\n"
     ]
    }
   ],
   "source": [
    "# 窗口切换\n",
    "driver.switch_to_window(driver.window_handles[2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出 .txt文件\n",
    "driver.find_element_by_xpath('//i[@class=\"icon icon-export\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-49-0188c2a7ff70>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])\n"
     ]
    }
   ],
   "source": [
    "# 窗口切换\n",
    "driver.switch_to_window(driver.window_handles[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 批量下载\n",
    "driver.find_element_by_xpath('//li[@class=\"bulkdownload export\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-51-1f3bb34cc9cb>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[3])\n"
     ]
    }
   ],
   "source": [
    "# 窗口切换\n",
    "driver.switch_to_window(driver.window_handles[3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 下载所选文献（500篇）\n",
    "driver.find_element_by_id('btn-download-all').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-53-0188c2a7ff70>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])\n"
     ]
    }
   ],
   "source": [
    "# 窗口切换\n",
    "driver.switch_to_window(driver.window_handles[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 清除选择\n",
    "driver.find_element_by_xpath('//*[@id=\"gridTable\"]/div[1]/div[2]/div[1]/a').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_id('PageNext').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[10, 11, 12, 13, 14, 15, 16, 17, 18]\n"
     ]
    }
   ],
   "source": [
    "# 第二轮下载\n",
    "pages = list(range(10,19))\n",
    "print(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10\t11\t12\t13\t14\t15\t16\t17\t18\t"
     ]
    }
   ],
   "source": [
    "process_choose(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出与分析 \n",
    "driver.find_element_by_xpath('//i[@class=\"icon-d\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出文献\n",
    "driver.find_element_by_xpath('//i[@class=\"icon-r\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 点击Refworks\n",
    "driver.find_element_by_xpath('//a[@exporttype=\"Refworks\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CDwindow-8D8283095CD2D3246CFBF4EE753ABEC4',\n",
       " 'CDwindow-1CD9E637B649BF5766B745C8B5E27E72',\n",
       " 'CDwindow-76303F9E7B0196950BA7E3B3787CD2C3',\n",
       " 'CDwindow-04AC7C2142B82E9212C4D1F1C312DEFF',\n",
       " 'CDwindow-E563C48D1DBA037EBF38E4136C95541F']"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看所有的窗口信息\n",
    "driver.window_handles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-62-ecda85629064>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[4])\n"
     ]
    }
   ],
   "source": [
    "# 窗口切换\n",
    "driver.switch_to_window(driver.window_handles[4])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出 .txt文件\n",
    "driver.find_element_by_xpath('//i[@class=\"icon icon-export\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-64-0188c2a7ff70>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])\n"
     ]
    }
   ],
   "source": [
    "# 窗口切换\n",
    "driver.switch_to_window(driver.window_handles[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 批量下载\n",
    "driver.find_element_by_xpath('//li[@class=\"bulkdownload export\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-66-f0765e4dfd3b>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[5])\n"
     ]
    }
   ],
   "source": [
    "# 窗口切换\n",
    "driver.switch_to_window(driver.window_handles[5])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 下载所选文献（450篇）\n",
    "driver.find_element_by_id('btn-download-all').click()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
